library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 2.0.1 ✔ dplyr 0.7.8
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.3.1 ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(gdata)
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
## The following objects are masked from 'package:dplyr':
##
## combine, first, last
## The following object is masked from 'package:purrr':
##
## keep
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(pracma)
##
## Attaching package: 'pracma'
## The following object is masked from 'package:purrr':
##
## cross
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
Consideren los datos de tipo de cambio /USD de la sesión 06 (est46114 s06 data.csv).
data <- read.xls("data/est46114_s06_data.xls")
data <- data.frame(data[,-1], row.names = data[,1])
Visualizo la covarianza de los datos
covar <- cov(data)
Entiendo los datos:
head(m0) # Vector de dimensión n.col con ceros
## [,1]
## [1,] 0
## [2,] 0
## [3,] 0
## [4,] 0
## [5,] 0
## [6,] 0
dim(B0) # Matriz diagonal de dim n.col x n.col
## [1] 80 80
M <- 10000
mu.sim <- matrix(NA,nrow=M, ncol=ncol(data))
Lambda.sim <- array(NA,dim=c(M,ncol(data),ncol(data)))
e.sim <- matrix(NA,nrow=M, ncol=ncol(data))
V.sim <- array(NA,dim=c(M,ncol(data),ncol(data)))
C.sim <- array(NA,dim=c(M,nrow(data),ncol(data)))
m <- 1; X <- as.matrix(data)
for(m in 1:M){
# Simulacion (mu,Lambda)
Lambda.sim[m,,] <- rWishart(1, output[[3]], output[[4]])
mu.sim[m,] <- mvrnorm(1, mu=output[[1]], Sigma=solve(output[[2]]*Lambda.sim[m,,]), tol = 1e-6)
# Simulacion (e,V) + C
eigen_aux <- eigen(solve(Lambda.sim[m,,]))
e.sim[m,] <- eigen_aux$values
V.sim[m,,] <- eigen_aux$vectors
C.sim[m,,] <- X %*% V.sim[m,,]
}
Inferencia sobre vector e1
hist(e.sim[,1])
summary(e.sim[,2])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.007685 0.009088 0.009428 0.009466 0.009812 0.011838
Inferencia sobre el componente principal uno \(c_{i,1}\) de la observacion \(i=1\)
hist(C.sim[,1,1])
summary(C.sim[,1,1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.4471381 -0.0814793 0.0002974 -0.0019114 0.0780395 0.4279798
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 5.5974 4.4493 3.0130 2.20335 1.8825 1.34403 1.2199
## Proportion of Variance 0.3916 0.2475 0.1135 0.06068 0.0443 0.02258 0.0186
## Cumulative Proportion 0.3916 0.6391 0.7526 0.81325 0.8576 0.88013 0.8987
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 1.06892 0.97110 0.92674 0.72000 0.66657 0.63707
## Proportion of Variance 0.01428 0.01179 0.01074 0.00648 0.00555 0.00507
## Cumulative Proportion 0.91302 0.92480 0.93554 0.94202 0.94757 0.95265
## PC14 PC15 PC16 PC17 PC18 PC19
## Standard deviation 0.59244 0.56605 0.52873 0.50745 0.48225 0.45241
## Proportion of Variance 0.00439 0.00401 0.00349 0.00322 0.00291 0.00256
## Cumulative Proportion 0.95703 0.96104 0.96453 0.96775 0.97066 0.97322
## PC20 PC21 PC22 PC23 PC24 PC25
## Standard deviation 0.43226 0.41341 0.38326 0.36792 0.34837 0.33876
## Proportion of Variance 0.00234 0.00214 0.00184 0.00169 0.00152 0.00143
## Cumulative Proportion 0.97555 0.97769 0.97953 0.98122 0.98273 0.98417
## PC26 PC27 PC28 PC29 PC30 PC31
## Standard deviation 0.3347 0.29816 0.28928 0.28039 0.26494 0.25963
## Proportion of Variance 0.0014 0.00111 0.00105 0.00098 0.00088 0.00084
## Cumulative Proportion 0.9856 0.98668 0.98773 0.98871 0.98959 0.99043
## PC32 PC33 PC34 PC35 PC36 PC37
## Standard deviation 0.24532 0.22461 0.21483 0.21401 0.20832 0.20103
## Proportion of Variance 0.00075 0.00063 0.00058 0.00057 0.00054 0.00051
## Cumulative Proportion 0.99118 0.99181 0.99239 0.99296 0.99350 0.99401
## PC38 PC39 PC40 PC41 PC42 PC43
## Standard deviation 0.19245 0.18495 0.17664 0.16573 0.15789 0.15205
## Proportion of Variance 0.00046 0.00043 0.00039 0.00034 0.00031 0.00029
## Cumulative Proportion 0.99447 0.99490 0.99529 0.99563 0.99594 0.99623
## PC44 PC45 PC46 PC47 PC48 PC49
## Standard deviation 0.14815 0.14747 0.14254 0.13863 0.13489 0.12967
## Proportion of Variance 0.00027 0.00027 0.00025 0.00024 0.00023 0.00021
## Cumulative Proportion 0.99651 0.99678 0.99703 0.99727 0.99750 0.99771
## PC50 PC51 PC52 PC53 PC54 PC55
## Standard deviation 0.1258 0.12301 0.11689 0.11270 0.10676 0.10237
## Proportion of Variance 0.0002 0.00019 0.00017 0.00016 0.00014 0.00013
## Cumulative Proportion 0.9979 0.99810 0.99827 0.99843 0.99857 0.99870
## PC56 PC57 PC58 PC59 PC60 PC61
## Standard deviation 0.09850 0.09616 0.09387 0.08617 0.08329 0.07919
## Proportion of Variance 0.00012 0.00012 0.00011 0.00009 0.00009 0.00008
## Cumulative Proportion 0.99882 0.99894 0.99905 0.99914 0.99923 0.99931
## PC62 PC63 PC64 PC65 PC66 PC67
## Standard deviation 0.07690 0.07589 0.07492 0.06843 0.06641 0.06353
## Proportion of Variance 0.00007 0.00007 0.00007 0.00006 0.00006 0.00005
## Cumulative Proportion 0.99938 0.99945 0.99952 0.99958 0.99964 0.99969
## PC68 PC69 PC70 PC71 PC72 PC73
## Standard deviation 0.06106 0.05887 0.05807 0.05353 0.04898 0.04858
## Proportion of Variance 0.00005 0.00004 0.00004 0.00004 0.00003 0.00003
## Cumulative Proportion 0.99973 0.99978 0.99982 0.99985 0.99988 0.99991
## PC74 PC75 PC76 PC77 PC78 PC79
## Standard deviation 0.04352 0.03523 0.03412 0.03212 0.02638 0.02132
## Proportion of Variance 0.00002 0.00002 0.00001 0.00001 0.00001 0.00001
## Cumulative Proportion 0.99994 0.99995 0.99997 0.99998 0.99999 1.00000
## PC80
## Standard deviation 0.01997
## Proportion of Variance 0.00000
## Cumulative Proportion 1.00000
## [1] "center" "rotation" "scale" "sdev" "x"
## PC1 PC2 PC3
## Canada 0.12 0.07 0.02
## Mexico 0.05 0.02 -0.21
## Guatemala 0.09 -0.03 -0.14
## El.Salvador -0.16 0.04 0.03
## Honduras 0.10 -0.08 0.03
## Nicaragua 0.15 -0.06 0.13
## Costa.Rica 0.13 0.01 -0.13
## Panama 0.17 -0.06 0.05
## Jamaica 0.11 -0.01 -0.16
## Dominican.Rep 0.13 -0.03 -0.11
## Trin.Tobago -0.03 0.00 0.11
## Colombia 0.13 -0.01 -0.09
## Venezuela. 0.08 -0.06 -0.17
## Ecuador 0.13 -0.03 -0.03
## Peru -0.15 0.05 0.00
## Chile 0.15 -0.01 -0.13
## Brazil. 0.15 0.05 0.06
## Paraguay 0.15 -0.02 0.07
## Uruguay 0.03 0.07 -0.13
## Argentina 0.10 -0.01 0.02
## EU12 0.13 0.13 -0.05
## Sweden 0.11 0.15 0.07
## Norway 0.01 0.21 0.07
## Finland 0.04 0.18 0.15
## Denmark -0.02 0.22 0.01
## U.K. -0.07 0.17 -0.01
## Ireland -0.03 0.20 -0.08
## Luxembourg 0.04 0.21 -0.01
## Netherlands 0.02 0.22 0.01
## France 0.04 0.21 0.02
## Germany 0.03 0.21 0.00
## Austria -0.04 0.21 0.04
## Czech.Rep 0.05 0.04 -0.29
## Hungary -0.10 0.14 -0.16
## Switzerland -0.06 0.18 0.07
## Poland 0.11 -0.02 -0.21
## Russia 0.01 -0.01 -0.29
## Spain -0.04 0.21 0.05
## Portugal -0.05 0.20 -0.08
## Italy -0.01 0.20 0.04
## Greece -0.02 0.20 -0.12
## Turkey 0.08 0.12 -0.19
## Syria -0.16 0.06 0.02
## Israel 0.05 0.10 -0.06
## Jordan 0.15 -0.02 0.05
## Kuwait 0.16 0.01 -0.06
## Saudi.Arabia 0.17 -0.05 0.02
## India 0.17 -0.03 0.03
## Pakistan 0.17 -0.02 0.05
## Bangladesh 0.16 -0.02 -0.09
## Sri.Lanka. 0.12 -0.01 -0.18
## Thailand 0.15 0.05 0.10
## Malaysia 0.16 -0.01 0.10
## Singapore 0.07 0.12 0.03
## Indonesia 0.15 0.01 0.07
## Philippines 0.13 0.07 0.05
## China.PR 0.17 -0.04 -0.03
## Korea 0.08 0.11 0.08
## Hong.Kong 0.08 0.05 -0.19
## Taiwan -0.01 0.11 0.20
## Japan -0.10 0.13 0.12
## Australia 0.11 0.14 -0.01
## New.Zealand 0.02 0.18 -0.01
## Morocco 0.15 0.10 -0.08
## Algeria 0.14 -0.02 0.18
## Tunisia 0.15 0.09 0.07
## Egypt 0.10 -0.06 0.12
## Cameroon 0.08 0.15 0.16
## Senegal 0.14 0.07 0.15
## Sierra.Leone 0.14 -0.04 0.07
## Cote.d.Ivoire 0.08 0.16 0.12
## Ghana 0.15 -0.01 0.13
## Nigeria 0.13 -0.03 0.02
## Benin 0.13 0.09 -0.11
## Congo -0.09 0.15 -0.15
## Kenya 0.05 0.09 -0.16
## Tanzania 0.14 -0.10 0.03
## Mozambique 0.16 -0.05 0.07
## South.Africa 0.14 0.06 0.07
## Zambia 0.08 0.05 -0.17
biplot(data.pca, cex = 0.4)
Checo eigenvalores para entender el país con mayor impacto
eigen_valores <- eigen_aux[[1]]
min(eigen_valores)
## [1] 3.380869e-13
eigen_valores
## [1] 1.005406e-02 8.945012e-03 8.568379e-03 7.776990e-03 7.250854e-03
## [6] 6.954315e-03 6.532857e-03 6.109320e-03 5.973391e-03 5.412402e-03
## [11] 5.300003e-03 4.881245e-03 4.734992e-03 4.651038e-03 4.231763e-03
## [16] 4.090305e-03 3.926344e-03 3.578475e-03 3.360516e-03 3.109428e-03
## [21] 3.057278e-03 2.763611e-03 2.655120e-03 2.103988e-03 1.520790e-03
## [26] 1.372505e-03 1.028458e-03 9.044215e-04 7.279499e-04 6.839850e-04
## [31] 5.570557e-04 4.596366e-04 3.156799e-04 2.996670e-04 2.624843e-04
## [36] 2.079227e-04 1.597302e-04 1.164114e-04 9.288259e-05 8.109777e-05
## [41] 5.721762e-05 4.189556e-05 3.248417e-05 2.149772e-05 1.749052e-05
## [46] 1.542206e-05 1.491063e-05 1.062244e-05 8.722998e-06 6.284613e-06
## [51] 5.508072e-06 4.764207e-06 3.155471e-06 2.056780e-06 1.561885e-06
## [56] 1.178130e-06 9.867810e-07 6.894558e-07 4.981577e-07 3.442743e-07
## [61] 1.848375e-07 7.985972e-08 7.220351e-08 4.949865e-08 3.729687e-08
## [66] 2.428477e-08 1.374213e-08 5.115640e-09 3.528184e-09 1.228708e-09
## [71] 1.061051e-09 5.676294e-10 2.649419e-10 2.127209e-10 1.600379e-10
## [76] 6.403018e-11 1.690780e-11 3.567601e-12 2.994756e-12 3.380869e-13
# Calculo para primer eigenvector y promedios de precios. Después lo hago por el eigenvalor relacionado al eigenvector
pca_1 <- data.pca$rotation[, 1]
data_means <- colMeans(data)
mult_1 <- pca_1 * t(data_means)
vect_1 <- mult_1*eigen_valores[1]
# Esto lo repito para los eigenvectores y eigenvalores 2 y 3.
pca_2 <- data.pca$rotation[, 2]
mult_2 <- pca_2 * t(data_means)
vect_2 <- mult_1*eigen_valores[2]
#pca_3 <- data.pca$rotation[, 3]
#mult_3 <- pca_3 * t(data_means)
#vect_3 <- mult_3*eigen_valores[3]
# Finalmente sumo estas multiplicaciones y grafico el total respecto a sus componentes principales.
sum_fin <- vect_1+vect_2
tabla <- data.frame(t(sum_fin))
colnames(tabla)<-c('prom')
tabla$eivec1 <- pca_1
tabla$eivec2 <- pca_2
plot_ly(x=tabla$eivec1, y=tabla$eivec2, z=tabla$prom, type = "scatter3d",mode="markers", color = tabla$prom)
# En esta gráfica se puede ver un patron interesante. El país mas alejado del cero, marcado en amarillo es Ecuador, seguido por indonesia y Paraguay.
# Grafico el promedio respecto a la primera componente
ggplot(tabla, aes(x=prom, y=eivec1)) + geom_point(position = "jitter") + geom_text(label=rownames(tabla))
# Si me quedo con países entre el 0 y 0.001 tengo esto
s1 <- subset(tabla, prom<0.001)
s1 <- subset(s1, prom>0)
ggplot(s1, aes(x=prom, y=eivec1)) + geom_point(position = "jitter") + geom_text(label=rownames(s1))
A partir de esto encuentro un patrón tal vez interesante. No soy economista pero probablemente estas economías cercanas a cero en tipo de cambio representen una economía estable. :/